path_loc = "D:/Onedrive/SNCF/Analyses/sncf_memoire_analysis/results/"
setwd(path_loc)
options(scipen=999)
rm(path_loc)



I. Introduction



This file is used to produce all the analyses performed to validate the General flexibility Scale (Code R : nflex = new flexibility). It contains script to describe the sample (age, sexe…), items of the nflex scale,the exploratory factorial analysis (using polychoric correlation and principal axis factor analysis) and the confirmatory factorial analysis used to assess the internal structure of the other scale (polychoric correlations and diagonal weighted least square factor analysis).
A simulated dataset with same parameters is available at : https://github.com/Anghille/flexibility_memory_analysis


II. Setup

1. Loading librairies

wants <- c("psych","reshape","reshape2","car","carData","moments","tidyverse","stringr","plotly","readxl","GGally","grid","gridExtra","data.table","esquisse","Hmisc","psychTools","stargazer","polycor","lavaan","lavaanPlot","ggpubr","corrplot","outliers","GPArotation","jtools")

# IF R CRASHES, REMOVE "JTOOLS" FROM THE "wants" VECTOR AND INSTALL/CALL IT AFTER BY UNCOMMENTING THE LAST 2 LINES OF THIS CHUNK

has <- wants %in% rownames(installed.packages())
if(any(!has)) install.packages(wants[!has])
invisible(lapply(wants, library, character.only = TRUE))
rm(has, wants)

# Uncomment it if needed (see last comment for the "why")
# install.packages("jtools")
# library(jtools)


2. Loading database

# Change path were your dataset is downloaded
path = "D:/Onedrive/SNCF/Analyses/sncf_memoire_analysis/results/DATA.csv"
data = read.csv2(file = path)
rm(path)
head(data)


3. database cleaning

The goal here is to automatically delete subjects with missing data from one of the scale, or if there level of french isnt enough which might interfer with the french scale coprehension. It also normalize the strings of the dataset to lowercase and invert back the inverted items. there is 4 dataframe in total : * Raw data – data * Data with general flexibility items only – nflex * data with all scales cleaned of NAdata_cleaned_scale * Data with general flexibility item only and item inverted back – nflex_inv

# Rename some variables
x = 0
a = c("Situation","Mothertongue","French level","Pourcentage")
for(i in c("Situation.professionnelle.avant.le.confinement","Langue.maternelle","Niveau.de.franÃ.ais","Pourcentage_perseveration_error_count")){
  x = x+1
  names(data)[names(data)==i] = a[x]
}

# set column name to lowerstring and pourcentage column to numeric (will tranform empty cells to NA)
data = setnames(data, tolower(names(data)))
data$pourcentage = as.numeric(data$pourcentage)
## Warning: NAs introduits lors de la conversion automatique
# Create all the needed dataframes
data_cleaned_scale = data %>% drop_na(c(7:42)) %>% filter(!(situation=="Sans emploi")) #For scale analysis
data_cleaned_wcst = data %>% drop_na(c(7:48)) %>% filter(!(situation=="Sans emploi")) #For validation with wcst
nflex_inv = data_cleaned_scale[,c(21:30)] #For GCF (general flexibility scale) analysis

# Invert back item that are inverted in the scales
inverted_item_likert7 = c("cognitive_rigidity_1","att_switch_1","att_switch_2","att_switch_4","att_switch_9","att_switch_10","cfs_2","cfs_3","cfs_5","cfs_10","nflex_3","nflex_5","nflex_7","nflex_9")
inverted_item_likert4 = c("att_switch_1","att_switch_2","att_switch_4","att_switch_9","att_switch_10")
for(i in inverted_item_likert7){
    if(i %in% inverted_item_likert4){
      data_cleaned_scale[[i]] = 5-data_cleaned_scale[[i]]
      data_cleaned_wcst[[i]] = 5-data_cleaned_wcst[[i]]
    } else{
      data_cleaned_scale[[i]] = 7-data_cleaned_scale[[i]]
      data_cleaned_wcst[[i]] = 7-data_cleaned_wcst[[i]]
    }
}

#Remove unwanted variables
rm(a, inverted_item_likert4, inverted_item_likert7, x)

Note. We’ll be using nflex_inv for further analysis of the scale structure. We’ll be using data_cleaned_scale for the confirmatory factor analysis of the other scales.

III. Descriptive statistics

1. Sample description

# Shows frequencies of variables
t = describe(data$situation)
as.data.frame(t$values)
describe(data$sexe)
## data$sexe 
##        n  missing distinct 
##      387        0        3 
##                             
## Value      Autre Femme Homme
## Frequency      2   274   111
## Proportion 0.005 0.708 0.287
# Shows descriptive table
stargazer(as.data.frame(data), type="text")
## 
## =======================================================================
## Statistic             N   Mean  St. Dev.  Min  Pctl(25) Pctl(75)  Max  
## -----------------------------------------------------------------------
## age                  387 32.990  14.773   18      22       43      84  
## cognitive_rigidity_1 387 3.377   1.153     1      3        4       6   
## cognitive_rigidity_2 387 3.494   1.203     1      3        4       6   
## cognitive_rigidity_3 387 3.333   1.113     1      3        4       6   
## cognitive_rigidity_4 387 3.729   1.021     1      3        4       6   
## att_switch_1         387 2.638   0.719     1      2        3       4   
## att_switch_2         387 2.514   0.847     1      2        3       4   
## att_switch_3         387 2.894   0.850     1      2        3       4   
## att_switch_4         387 2.687   0.918     1      2        3       4   
## att_switch_5         387 2.775   0.915     1      2        3       4   
## att_switch_6         387 2.881   0.793     1      2        3       4   
## att_switch_7         387 3.078   0.788     1      3        4       4   
## att_switch_8         387 3.065   0.778     1      3        4       4   
## att_switch_9         387 2.855   0.922     1      2        4       4   
## att_switch_10        387 2.566   0.945     1      2        3       4   
## nflex_1              387 4.597   1.114     1      4        5       6   
## nflex_2              387 4.491   0.991     1      4        5       6   
## nflex_3              387 3.439   1.338     1      2        4       6   
## nflex_4              387 4.566   1.037     1      4        5       6   
## nflex_5              387 3.837   1.296     1      3        5       6   
## nflex_6              387 4.406   1.182     1      4        5       6   
## nflex_7              387 3.370   1.243     1      2        4       6   
## nflex_8              387 4.258   1.129     1      4        5       6   
## nflex_9              387 3.171   1.328     1      2        4       6   
## nflex_10             387 4.576   1.113     1      4        5       6   
## cfs_1                387 4.649   1.041     1      4        6       6   
## cfs_2                387 2.842   1.250     1      2        4       6   
## cfs_3                387 2.778   1.421     1      2        4       6   
## cfs_4                387 4.119   1.006     1      4        5       6   
## cfs_5                387 2.579   1.111     1      2        3       6   
## cfs_6                387 4.755   1.007     2      4        6       6   
## cfs_7                387 4.307   0.928     1      4        5       6   
## cfs_8                387 4.209   1.108     1      4        5       6   
## cfs_9                387 4.124   1.056     1      4        5       6   
## cfs_10               387 2.674   1.214     1      2        3       6   
## cfs_11               387 4.974   0.973     1      4        6       6   
## cfs_12               387 3.835   1.250     1      3        5       6   
## pourcentage          93  11.559  4.935   0.000  8.000    13.000  30.000
## -----------------------------------------------------------------------
# All data set description with 350 subjects
stargazer(data_cleaned_scale, type="text")
## 
## =======================================================================
## Statistic             N   Mean  St. Dev.  Min  Pctl(25) Pctl(75)  Max  
## -----------------------------------------------------------------------
## age                  350 30.831  12.311   18      22      37.8     71  
## cognitive_rigidity_1 350 3.580   1.167     1      3        4       6   
## cognitive_rigidity_2 350 3.489   1.213     1      3        4       6   
## cognitive_rigidity_3 350 3.303   1.112     1      3        4       6   
## cognitive_rigidity_4 350 3.723   1.033     1      3        4       6   
## att_switch_1         350 2.363   0.708     1      2        3       4   
## att_switch_2         350 2.477   0.845     1      2        3       4   
## att_switch_3         350 2.906   0.863     1      2       3.8      4   
## att_switch_4         350 2.309   0.925     1      2        3       4   
## att_switch_5         350 2.769   0.924     1      2        3       4   
## att_switch_6         350 2.880   0.810     1      2        3       4   
## att_switch_7         350 3.089   0.784     1      3        4       4   
## att_switch_8         350 3.043   0.787     1      3        4       4   
## att_switch_9         350 2.157   0.906     1      1        3       4   
## att_switch_10        350 2.426   0.933     1      2        3       4   
## nflex_1              350 4.583   1.106     1      4        5       6   
## nflex_2              350 4.491   0.969     2      4        5       6   
## nflex_3              350 3.471   1.319     1      3        4       6   
## nflex_4              350 4.554   1.039     1      4        5       6   
## nflex_5              350 3.134   1.301     1      2        4       6   
## nflex_6              350 4.394   1.180     1      4        5       6   
## nflex_7              350 3.637   1.245     1      3        5       6   
## nflex_8              350 4.254   1.131     1      4        5       6   
## nflex_9              350 3.829   1.335     1      3        5       6   
## nflex_10             350 4.583   1.114     1      4        6       6   
## cfs_1                350 4.660   1.052     1      4        6       6   
## cfs_2                350 4.151   1.245     1      3        5       6   
## cfs_3                350 4.197   1.405     1      3        5       6   
## cfs_4                350 4.097   1.000     1      4        5       6   
## cfs_5                350 4.414   1.098     1      4        5       6   
## cfs_6                350 4.757   0.988     2      4        6       6   
## cfs_7                350 4.331   0.930     1      4        5       6   
## cfs_8                350 4.209   1.112     1      4        5       6   
## cfs_9                350 4.157   1.055     1      4        5       6   
## cfs_10               350 4.291   1.209     1      4        5       6   
## cfs_11               350 4.980   0.982     1      4        6       6   
## cfs_12               350 3.831   1.240     1      3        5       6   
## pourcentage          87  11.494  4.969   0.000  8.000    13.000  30.000
## -----------------------------------------------------------------------
# Sample Age description 
statss = data_cleaned_scale %>% 
  group_by(sexe, situation) %>% 
  summarise(mean_age = mean(age), sd_age = sd(age))
# v = c(3,4,6,7)
# for(i in 1:4){
#   statss[v[i], 1]=NA
# }
stargazer(as.data.frame(statss), type="text", summary = F, digits = 2)
## 
## ===================================
##   sexe   situation  mean_age sd_age
## -----------------------------------
## 1 Autre  Etudiant    21.50    0.71 
## 2 Femme  Etudiant    22.21    2.30 
## 3 Femme Travailleur  38.04   12.77 
## 4 Homme  Etudiant    22.35    2.36 
## 5 Homme Travailleur  38.41   12.44 
## -----------------------------------
rm(statss)

describe(data_cleaned_scale$situation)
## data_cleaned_scale$situation 
##        n  missing distinct 
##      350        0        2 
##                                   
## Value         Etudiant Travailleur
## Frequency          161         189
## Proportion        0.46        0.54
describe(data_cleaned_scale$sexe)
## data_cleaned_scale$sexe 
##        n  missing distinct 
##      350        0        3 
##                             
## Value      Autre Femme Homme
## Frequency      2   253    95
## Proportion 0.006 0.723 0.271

Sexe and Situation distribution function of age :
# Subject's histogram

data_ggplot = data
data_ggplot$situation = factor(data_ggplot$situation, levels=c("Etudiant", "Travailleur", "Sans emploi"))
data_ggplot$sexe = factor(data_ggplot$sexe, levels=c("Femme", "Homme", "Autre"))

## For sexe/age
ggplotly(ggplot(data=data_ggplot, aes(x=age, fill=sexe)) + 
  geom_histogram(color="#e9ecef", alpha=.9, position ="identity",binwidth = 2) + 
  scale_fill_manual(values=c("#404080", "#b8c2cc", "#69b3a2")) +
  theme_apa() +
  theme(legend.position = "right") +
  labs(fill="Sexe") +
  scale_x_continuous(name="Age", breaks = seq(0, 85, 10)) +
  ylab("N"))
## For situation/age
ggplotly(ggplot(data=data_ggplot, aes(x=age, fill=situation))+
  geom_histogram(color="#e9ecef", alpha=.9, position ="identity",binwidth = 2) + 
  scale_fill_manual(values=c("#404080", "#b8c2cc", "#69b3a2")) +
  theme_apa() +
  theme(legend.position = "right") +
  ylab("N") +
  labs(fill="Situation") +
  scale_x_continuous(name="Age", breaks = seq(0, 85, 10)))
data_ggplot = data_cleaned_scale
data_ggplot$situation = factor(data_ggplot$situation, levels=c("Etudiant", "Travailleur", "Sans emploi"))
data_ggplot$sexe = factor(data_ggplot$sexe, levels=c("Femme", "Homme", "Autre"))

## For sexe/age
ggplotly(ggplot(data=data_ggplot, aes(x=age, fill=sexe)) + 
  geom_histogram(color="#e9ecef", alpha=.9, position ="identity",binwidth = 2) + 
  scale_fill_manual(values=c("#404080", "#b8c2cc", "#69b3a2")) +
  theme_apa() +
  theme(legend.position = "right") +
  labs(fill="Sexe") +
  scale_x_continuous(name="Age", breaks = seq(0, 85, 10)) +
  ylab("N"))
## For situation/age
ggplotly(ggplot(data=data_ggplot, aes(x=age, fill=situation))+
  geom_histogram(color="#e9ecef", alpha=.9, position ="identity",binwidth = 2) + 
  scale_fill_manual(values=c("#404080", "#b8c2cc", "#69b3a2")) +
  theme_apa() +
  theme(legend.position = "right") +
  ylab("N") +
  labs(fill="Situation") +
  scale_x_continuous(name="Age", breaks = seq(0, 85, 10)))
rm(data_ggplot)



2. General Cognitive Flexibility Scale

a. Histograms


Shows histogram for each item of the general cognitive flexibility scale. The goal is to check if any item seems to have an odd distribution, which would indicate that he isn’t suited to measure our flexibility. We therefore check item for ceiling or skewed items.

# Shows the general flexibility items' histograms
ggplot_data = nflex_inv
ggplot_data = ggplot_data %>% 
  rename(Q1 = nflex_1,
         Q2 = nflex_2,
         Q3 = nflex_3,
         Q4 = nflex_4,
         Q5 = nflex_5,
         Q6 = nflex_6,
         Q7 = nflex_7,
         Q8 = nflex_8,
         Q9 = nflex_9,
         Q10 = nflex_10)
ggplotly(ggplot(gather(ggplot_data), aes(x=value)) + 
  geom_histogram(stat = "count") + facet_wrap(~key, scales = 'free_x') +
  scale_color_grey() +
  theme_apa() +
  theme(legend.position = "bottom") +
  xlab("Questions") + ylab("N"))
## Warning: Ignoring unknown parameters: binwidth, bins, pad
ggplotly(ggplot(gather(data_cleaned_scale[,11:20]), aes(x=value)) + 
  geom_histogram(stat = "count") + facet_wrap(~key, scales = 'free_x') +
  scale_color_grey() +
  theme_apa() +
  theme(legend.position = "bottom") +
  xlab("Questions") + ylab("N"))
## Warning: Ignoring unknown parameters: binwidth, bins, pad
ggplotly(ggplot(gather(data_cleaned_scale[,7:10]), aes(x=value)) + 
  geom_histogram(stat = "count") + facet_wrap(~key, scales = 'free_x') +
  scale_color_grey() +
  theme_apa() +
  theme(legend.position = "bottom") +
  xlab("Questions") + ylab("N"))
## Warning: Ignoring unknown parameters: binwidth, bins, pad
ggplotly(ggplot(gather(data_cleaned_scale[,31:42]), aes(x=value)) + 
  geom_histogram(stat = "count") + facet_wrap(~key, scales = 'free_x') +
  scale_color_grey() +
  theme_apa() +
  theme(legend.position = "bottom") +
  xlab("Questions") + ylab("N"))
## Warning: Ignoring unknown parameters: binwidth, bins, pad
rm(ggplot_data)

Items 1, 4, 6, 10 seems to have ceiling or skewed distribution. It is then recommended to check if extreme values of each item (values 1 and 6) have more than 20% of the responses. We therefore check for skewness and kurtosis. (Garin, 2014 ; Benzina, 2019)

Reminder :
The skewness coefficient is a third-order measure of the central tendency of the sample. A negative coefficient translate to a biased distribution toward the right. A positive coefficient translate to a biased distribution toward the left. \[\gamma_{X} = E\left[\left(\frac{X - \mu}{\sigma}\right)^3\right] \]

The kurtosis coefficient is a 4th order measure of the central tendency of the sample. It translate to how fast the distribution goes back to 0 (if it does). Therefore, it shows if there is more extreme values than it should. A high kurtosis translate to high number of extreme values.
\[Kurt[X] = E\left[\left(\frac{X - \mu}{\sigma}\right)^4\right] \]

# Compute skewness and Kurtosis
item = colnames(nflex_inv)
kurt_skew = as.tibble(round(skewness(nflex_inv), 2)) %>% 
  mutate(kurtosis = round(kurtosis(nflex_inv), 2)) %>% 
  cbind(item) %>% 
  rename(skewness = value)
rm(item)
kurt_skew[, c(3,1,2)]
rm(kurt_skew)



References

Benzina, N. (2019). Évaluation de la flexibilité cognitive dans le trouble obsessionnel compulsif : Étude de la validité de deux auto-questionnaires comparés à une tâche expérimentale. Université de Rouen.


The table below shows items with more than 20% of extreme values.

# This script check what item got more than 20% of response in the extreme values. It then create a new dataframe containing only the items non extreme. 

## Create dataframe with upper item and lower item frequencies of response
frequencies  = data.frame(item = character(10), upper = numeric(10), lower = numeric(10))
for(i in 1:10){
  upper = length(which(nflex_inv[i]==6))/nrow(nflex_inv)
  lower = length(which(nflex_inv[i]==1))/nrow(nflex_inv)
  frequencies$item[i] = paste0("nflex_", as.character(i))
  frequencies$upper[i] = upper
  frequencies$lower[i] = lower
}
frequencies
rm(upper, lower, i)

## Create a list used to delete item that have more than 20% of extreme values.
list = c()
for(i in 1:10){
  if(frequencies[i,2]>.20 | frequencies[i,3]>.20 ){
    list = c(list, i)
    }
}
# Create a dataframe with the remaining items of the General Cognitive Flexibility (GCF)
nflex_inv_reduct = nflex_inv[,-list]
rm(list,i)


b. Summary



Items 1, 4, 6, 10 are in fact not well suited. We deleted them from further analysis.The next table shows the flexibiliy descriptive stats for the remaining items

# Shows descriptive statistique for remaining items of the GCF
stargazer(as.data.frame(nflex_inv_reduct), type="text")
## 
## ======================================================
## Statistic  N  Mean  St. Dev. Min Pctl(25) Pctl(75) Max
## ------------------------------------------------------
## nflex_2   350 4.491  0.969    2     4        5      6 
## nflex_3   350 3.529  1.319    1     3        4      6 
## nflex_5   350 3.866  1.301    1     3        5      6 
## nflex_7   350 3.363  1.245    1     2        4      6 
## nflex_8   350 4.254  1.131    1     4        5      6 
## nflex_9   350 3.171  1.335    1     2        4      6 
## ------------------------------------------------------


c. correlation



We show here the item’s correlation. We used polychoric correlation as it is suited for ordinal values with skewed distribution. The polychoric correlation is a measure of the pairwise association of ordinal variables. We smooth the correlation to avoid any non positive definite measure (Debelak et Tran, 2016)

Références

Debelak, R., & Tran, U. S. (2016). Comparing the Effects of Different Smoothing Algorithms on the Assessment of Dimensionality of Ordered Categorical Items with Parallel Analysis. PloS one, 11(2), e0148143. https://doi.org/10.1371/journal.pone.0148143

D.L. Knol and JMF ten Berge (1989) Least squares approximation of an improper correlation matrix by a proper one. Psychometrika, 54, 53-61.

# Compute the polychoric correlation (rho), smooth it by eigenvalue decomposition
poly_nflex_inv_reduct = polychoric(nflex_inv_reduct)
poly_nflex_inv_reduct = poly_nflex_inv_reduct$rho
poly_nflex_inv_reduct = cor.smooth(poly_nflex_inv_reduct, eig.tol=10^-12)
# Shows correlation in a matrix
correlation = poly_nflex_inv_reduct
correlation[upper.tri(correlation)] <- NA
stargazer(correlation, title="Polychloric correlation matrix", type="text")
## 
## Polychloric correlation matrix
## =======================================================
##         nflex_2 nflex_3 nflex_5 nflex_7 nflex_8 nflex_9
## -------------------------------------------------------
## nflex_2    1                                           
## nflex_3 -0.220     1                                   
## nflex_5 -0.164   0.243     1                           
## nflex_7 -0.279   0.308   0.280     1                   
## nflex_8  0.216  -0.034   0.047  -0.153     1           
## nflex_9 -0.056   0.145   0.084   0.419  -0.104     1   
## -------------------------------------------------------
rm(correlation)


IV. Exploratory Factorial Analysis

1. EFA’s assumption



# Compute bartlett, KMO and check if we got an identity matrix
cortest.bartlett(R = poly_nflex_inv_reduct, n = nrow(nflex_inv_reduct))
## $chisq
## [1] 205.5948
## 
## $p.value
## [1] 0.00000000000000000000000000000000001546974
## 
## $df
## [1] 15
KMO(poly_nflex_inv_reduct)
## Kaiser-Meyer-Olkin factor adequacy
## Call: KMO(r = poly_nflex_inv_reduct)
## Overall MSA =  0.63
## MSA for each item = 
## nflex_2 nflex_3 nflex_5 nflex_7 nflex_8 nflex_9 
##    0.65    0.73    0.66    0.61    0.57    0.57
det(poly_nflex_inv_reduct)
## [1] 0.5521594

KMO’s measure of sample adequacy test

\[MSA_{nflex} = .63 \space (MSA_{min} = .50)\]

Bartlett Sphericity test
\[\chi^2(6) = 205.59,\space p < .001.\]
EFA is appropriate.


Det(cor(A))

\[det(A) = 0.55,\space non-identity\space matrix\]
As shown byt the KMO test, items 8 and 9 seems to have a really low KMO. We therefore deleted them from further analysis, computed the polychoric correlation of this new dataset and tested again the assumptions :

# Compute new dataset of nflex without item 8 and 9
nflex_inv_reduct_msa = nflex_inv_reduct[,-c(5,6)]

#Compute polychoric correlation with the item 8 and 9 removed
poly_nflex_inv_reduct_msa = polychoric(nflex_inv_reduct_msa)
## Warning in matpLower(x, nvar, gminx, gmaxx, gminy, gmaxy): 5 cells were adjusted
## for 0 values using the correction for continuity. Examine your data carefully.
poly_nflex_inv_reduct_msa = poly_nflex_inv_reduct_msa$rho
poly_nflex_inv_reduct_msa = cor.smooth(poly_nflex_inv_reduct_msa, eig.tol=10^-12)

# Compute bartlett, KMO and check if we got an identity matrix
cortest.bartlett(R = poly_nflex_inv_reduct_msa, n = nrow(nflex_inv_reduct_msa))
## $chisq
## [1] 110.5166
## 
## $p.value
## [1] 0.000000000000000000001588914
## 
## $df
## [1] 6
KMO(poly_nflex_inv_reduct_msa)
## Kaiser-Meyer-Olkin factor adequacy
## Call: KMO(r = poly_nflex_inv_reduct_msa)
## Overall MSA =  0.68
## MSA for each item = 
## nflex_2 nflex_3 nflex_5 nflex_7 
##    0.70    0.69    0.70    0.65
det(poly_nflex_inv_reduct_msa)
## [1] 0.7271338
rm(poly_nflex_inv_reduct)

KMO’s measure of sample adequacy test

\[MSA_{nflex} = .68 \space (MSA_{min} = .50)\]

Bartlett Sphericity test
\[\chi^2(6) = 110.51,\space p < .001.\]
EFA is appropriate.


Det(cor(A))

\[det(A) = 0.73,\space non-identity\space matrix\]
We need to test if it’s recommanded and possible to do an EFA. The Bartlett Sphericity Test compare our data matrix with matrix randomly generated by those same date and then check if those data are linked. The measure sample adequacy test of Kaiser-Meyer and Olin check sample adequacy for each variable in the model and for the complete.

Reminder : Common variance (variance shared between an item and other items), specific variance (variance of the item that isnt shared with other items) et residual variance (error associated to the item)
Reminder 2 : Because we have ordinal data, we need to use the polychoric correlation matrix in the KMO and Bartlett tests and not the raw dataset, otherwise it will automatically use the pearson correlation matrix ! Reminder 3 : MSA rise as sample size rise, mean correlation rise, number of variables rise et land number of factors drops. We used the modified MSA (Kaiser et Rice, 1974).

Références

Kaiser, H. F., & Rice, J. (1974). Little Jiffy, Mark Iv. Educational and Psychological Measurement, 34(1), 111‑117. https://doi.org/10.1177/001316447403400115

Voir pour Bartlett Pour le déterminer, c’est inhérant aux méthodes d’analyse (pas de matrice identitaire, sans quoi, on ne peut pas calculer les matrices inverses)

Tinsley, H. E. A., & Tinsley, D. J. (1987). Uses of factor analysis in counseling psychology research. Journal of Counseling Psychology, 34(4), 414‑424. https://doi.org/0022-0167.34.4.414



Compute R² of items

# Compute multiple regression for R² 
lm1 = lm(nflex_2~nflex_3+nflex_5+nflex_7, data=nflex_inv_reduct_msa)
#ConditionRegression(lm1)
lm2 = lm(nflex_3~nflex_2+nflex_5+nflex_7, data=nflex_inv_reduct_msa)
#ConditionRegression(lm2)
lm3 = lm(nflex_5~nflex_3+nflex_2+nflex_7, data=nflex_inv_reduct_msa)
#ConditionRegression(lm3)
lm4 = lm(nflex_7~nflex_3+nflex_5+nflex_2, data=nflex_inv_reduct_msa)
#ConditionRegression(lm4)

# summary(lm1)
# summary(lm2)
# summary(lm3)
# summary(lm4)

#R² obtained with those regressions 
c(.09796, .1277,.1005,.1609)
## [1] 0.09796 0.12770 0.10050 0.16090

2. Polychoric exploratory factorial analysis

We used the parallel analysis with polychoric correlation because we have ordinal data, skewed items and non normality:



Référence

Buja, A., & Eyuboglu, N. (1992). Remarks on Parallel Analysis. Multivariate Behavioral Research, 27(4), 509-540. http://doi.org/10.1207/s15327906mbr2704_2

Devlin, S. J., Gnanadesikan, R., & Kettenring, J. R. (1981). Robust estimation of dispersion matrices and principal components. Journal of the American Statistical Association, 76, 354-362. http://doi.org/10.1080/01621459.1981.10477654

ten Berge, J. M. F., & Kiers, H. A. L. (1991). A numerical approach to the approximate and the exact minimum rank of a covariance matrix. Psychometrika, 56(2), 309-315. http://doi.org/10.1007/BF02294464

Timmerman, M. E., & Lorenzo-Seva, U. (2011). Dimensionality assessment of ordered polytomous items with parallel analysis. Psychological Methods, 16(2), 209-220. http://doi.org/10.1037/a0023353

Ford, J. Kevin, Robert C. MacCALLUM, et Marianne Tait. « The Application of Exploratory Factor Analysis in Applied Psychology: A Critical Review and Analysis ». Personnel Psychology 39, nᵒ 2 (juin 1986): 291‑314. https://doi.org/10.1111/j.1744-6570.1986.tb00583.x. (critère du 0.40)

Garrido, L. E., Abad, F. J., & Ponsoda, V. (2013). A new look at Horn’s parallel analysis with ordinal variables. Psychological Methods, 18(4), 454‑474. PubMed. https://doi.org/10.1037/a0030005

Tinsley, H. E. A., & Tinsley, D. J. (1987). Uses of factor analysis in counseling psychology research. Journal of Counseling Psychology, 34(4), 414‑424. https://doi.org/0022-0167.34.4.414

We then used the efa wiith least squares algorithm (principal axis factor analysis), with number of factor indicated by the parallel analysis, and a varimax rotation. We set the factor loadings limit to .30 (Peterson, 2000). Anything below is considered too small.

Référence

Ford, J. K., MacCALLUM, R. C., & Tait, M. (1986). The application of exploratory factor analysis in applied psychology : A critical review and analysis. Personnel Psychology, 39(2), 291‑314. https://doi.org/10.1111/j.1744-6570.1986.tb00583.x

Lee, S.-Y., Poon, W.-Y., & Bentler, P. M. (1995). A two-stage estimation of structural equation models with continuous and polytomous variables. British Journal of Mathematical and Statistical Psychology, 48(2), 339‑358. https://doi.org/10.1111/j.2044-8317.1995.tb01067.x

Baglin, J. (2014). Improving Your Exploratory Factor Analysis for Ordinal Data : A Demonstration Using FACTOR. Practical Assessment, Research, and Evaluation, 19(5), 1‑16. https://doi.org/10.7275/dsep-4220

Peterson, R. A. (2000). A Meta-Analysis of Variance Accounted for and Factor Loadings in Exploratory Factor Analysis. Marketing Letters, 11(3), 261‑275.

Tinsley, H. E. A., & Tinsley, D. J. (1987). Uses of factor analysis in counseling psychology research. Journal of Counseling Psychology, 34(4), 414‑424. https://doi.org/0022-0167.34.4.414\

# Compute the number of factors
pa = fa.parallel(poly_nflex_inv_reduct_msa, fm="pa", fa="fa", main = "Scree Plot", se.bars = T, n.obs = nrow(nflex_inv_reduct_msa))

## Parallel analysis suggests that the number of factors =  1  and the number of components =  NA
vss(poly_nflex_inv_reduct_msa)
## n.obs was not specified and was arbitrarily set to 1000.  This only affects the chi square values.

## 
## Very Simple Structure
## Call: vss(x = poly_nflex_inv_reduct_msa)
## VSS complexity 1 achieves a maximimum of 0.53  with  1  factors
## VSS complexity 2 achieves a maximimum of 0.57  with  2  factors
## 
## The Velicer MAP achieves a minimum of NA  with  1  factors 
## BIC achieves a minimum of  NA  with  1  factors
## Sample Size adjusted BIC achieves a minimum of  NA  with  1  factors
## 
## Statistics by number of factors 
##   vss1 vss2  map dof           chisq prob sqresid  fit  RMSEA BIC SABIC complex
## 1 0.53 0.00 0.11   2 2.0188668274157 0.36     2.2 0.53 0.0029 -12  -5.4     1.0
## 2 0.35 0.57 0.39  -1 0.0000000000071   NA     2.1 0.57     NA  NA    NA     1.6
## 3 0.34 0.56 1.00  -3 0.0000000000000   NA     2.0 0.57     NA  NA    NA     1.8
## 4 0.34 0.56   NA  -4 0.0000000000000   NA     2.0 0.57     NA  NA    NA     1.8
##             eChisq        SRMR eCRMS eBIC
## 1 2.32087835009383 0.013907068 0.024  -11
## 2 0.00000000000776 0.000000025    NA   NA
## 3 0.00000000000011 0.000000003    NA   NA
## 4 0.00000000000011 0.000000003    NA   NA
# EFA with polychoric correlation and R² from multiple regression as original communalitites
fit_nflex = fa(nflex_inv_reduct_msa, cor="poly", nfactors = pa$nfact, fm="pa", rotate="none", residuals = T, correct=F, SMC=c(.09796, .1277,.1005,.1609))
fa.diagram(fit_nflex, sort = F, errors = T, labels = T, digits = 2, cut=.39)

fa.plot(fit_nflex) #plot the loadings of each factors

fit_nflex
## Factor Analysis using method =  pa
## Call: fa(r = nflex_inv_reduct_msa, nfactors = pa$nfact, rotate = "none", 
##     residuals = T, SMC = c(0.09796, 0.1277, 0.1005, 0.1609), 
##     fm = "pa", cor = "poly", correct = F)
## Standardized loadings (pattern matrix) based upon correlation matrix
##           PA1   h2   u2 com
## nflex_2 -0.45 0.21 0.79   1
## nflex_3  0.53 0.28 0.72   1
## nflex_5  0.44 0.20 0.80   1
## nflex_7  0.63 0.40 0.60   1
## 
##                 PA1
## SS loadings    1.08
## Proportion Var 0.27
## 
## Mean item complexity =  1
## Test of the hypothesis that 1 factor is sufficient.
## 
## The degrees of freedom for the null model are  6  and the objective function was  0.35 with Chi Square of  120.78
## The degrees of freedom for the model are 2  and the objective function was  0 
## 
## The root mean square of the residuals (RMSR) is  0.01 
## The df corrected root mean square of the residuals is  0.02 
## 
## The harmonic number of observations is  350 with the empirical chi square  0.49  with prob <  0.78 
## The total number of observations was  350  with Likelihood Chi Square =  0.44  with prob <  0.8 
## 
## Tucker Lewis Index of factoring reliability =  1.041
## RMSEA index =  0  and the 90 % confidence intervals are  0 0.066
## BIC =  -11.28
## Fit based upon off diagonal values = 1
## Measures of factor score adequacy             
##                                                    PA1
## Correlation of (regression) scores with factors   0.78
## Multiple R square of scores with factors          0.61
## Minimum correlation of possible factor scores     0.21
# Reliability Test
fit_omega_nflex = omega(m=nflex_inv_reduct_msa, poly=T, nfactors = 1, fm = "pa", rotation="none", plot = T)
## Warning in matpLower(x, nvar, gminx, gmaxx, gminy, gmaxy): 5 cells were adjusted
## for 0 values using the correction for continuity. Examine your data carefully.
## Omega_h for 1 factor is not meaningful, just omega_t
## Warning in schmid(m, nfactors, fm, digits, rotate = rotate, n.obs = n.obs, :
## Omega_h and Omega_asymptotic are not meaningful with one factor
## Warning in cov2cor(t(w) %*% r %*% w): diag(.) had 0 or NA entries; non-finite
## result is doubtful


Fit index of the EFA shows how well the model is doing compare to a base model (all item in one factor). It also shows the fitting of items on the factors. We need to check for the \(\chi²\) test (results close to 0 shows a perfect fit), the root meant square residuals (range between 0 and 1), the standardized root mean residual (SRMR), the Tucker Lewis index (TLI).

The \(\chi²\) test is used for hypothesis testing to evaluate the appropriateness of a structural equation model. It checks if the sample covariance matrix \(S\) is equal to the model-implied covariance matrix \(\Sigma (\theta)\) (Null hypothesis :\(S-\Sigma (\hat{\theta})=0\)). The \(\chi²\) test is sensitive to number of observation. He will always be significant with more than 400 observations. (Schermelleh-Engel et al., 2003).

Because exact fit never occurs, the null hypothesis of exact fit is replaced by the null hypothesis of “close-fit”. Thus, the RMSEA is a measure of approximate fit in the population and is therefore concerned with the discrepancy due to approximation. (Schermelleh-Engel et al., 2003). Steiger (1990) and Browne and Cudeck (1993) define a “close-fit” as a \(RMSEA \leq .05\), an adequate fit as a \(.05\leq RMSEA \leq.08\), a mediocre fit as \(.08\leq RMSEA \leq.10\) and anythin else as not acceptable. For Hu and Bentler (1999), a cutoff of .06 is appropriate. RMSEA is relatively iundependent of sample size and favors parsimonious models (Browne and Cudeck, 1993 ; Kaplan, 2000).

The standardized root mean square residual (SRMR) was developped to overcome the problems that comes along with the root mean residual, which is dependant on the siezs of the variance and covariances of the observed variables.A value of 0 indicates a perfect fit. But there is not real cutoff, as this value is still dependent of variance and covariances of the observed variables (even if less than for the RMR). Hu and Bentler (1995) suggested that \(SRMR \leq .05\) indicate a good fit and \(.05\leq SRMR \leq.10\) indicates an acceptable fit.

The Turker Lewis Index, also known as NonNormed Fit index (Bentler and Bonnett, 1980) ranges from 0 to 1 but can sometime go beyond 1, as it is nonnormed. Less restrictive models (more complexe) are penalized while more persimonious models are rewarded by an increase in the fit index. This index is one of the less affected by sample size (Bentler, 1990 ; Hu et Bentler, 1998 ; Schermelleh-Engel et al., 2003)

Références :

Sharma, S., Mukherjee, S., Kumar, A., & Dillon, W. R. (2005). A simulation study to investigate the use of cutoff values for assessing model fit in covariance structure models. Journal of Business Research, 58(7), 935‑943. https://doi.org/10.1016/j.jbusres.2003.10.007\

Bagozzi, R. R., & Yi, Y. (1988). On the evaluation of structural equation models. Journal of the Academy of Marketing Science, 16(1), 74‑94. https://doi.org/0092-0703/88 / 1601-0074

Hu, L., & Bentler, P. M. (1999). Cutoff criteria for fit indexes in covariance structure analysis : Conventional criteria versus new alternatives. Structural Equation Modeling: A Multidisciplinary Journal, 6(1), 1‑55. https://doi.org/10.1080/10705519909540118\

Schermelleh-Engel, K., Moosbrugger, H., & Müller, H. (2003). Evaluating the Fit of Structural Equation Models : Tests of Significance and Descriptive Goodness-of-Fit Measures. Methods of Psychological Research, 8(2), 23‑74.

knitr::kable(data.frame("alpha" = fit_omega_nflex$alpha, "Omega"= fit_omega_nflex$omega.tot)) %>% kableExtra::kable_styling("striped", full_width = F, position="left")
## Warning in kableExtra::kable_styling(., "striped", full_width = F, position =
## "left"): Please specify format in kable. kableExtra can customize either HTML or
## LaTeX outputs. See https://haozhu233.github.io/kableExtra/ for details.
alpha Omega
0.5700386 0.5748066

The Omega is an estimation of the general factor saturation in a scale. The Omega asymptotic coefficient can be compare to a Guttman \(\lambda^6\) (or the Cronbach \(\alpha\)).

Références

Revelle and Zinbarg (2009)

Zinbarg, R. E., Revelle, W., Yovel, I., & Li, W. (2005). Cronbach’s α, Revelle’s β, and Mcdonald’s ωH : Their relations with each other and two alternative conceptualizations of reliability. Psychometrika, 70(1), 123‑133. https://doi.org/10.1007/s11336-003-0974-7

Trizano-Hermosilla, I., & Alvarado, J. M. (2016). Best Alternatives to Cronbach’s Alpha Reliability in Realistic Conditions : Congeneric and Asymmetrical Measurements. Frontiers in Psychology, 7(769). https://doi.org/10.3389/fpsyg.2016.00769

V. Confirmatory Factor Analysis

1. Creation of cfa models

Create models used for the cfa. It implies the 3 scales already validated (cfs, attention switching sub-set, cognitive rigidity sub-set).

#Create the lavaan models
model_cfs = "flexibilite =~ cfs_1+cfs_2+cfs_3+cfs_4+cfs_5+cfs_6+cfs_7+cfs_8+cfs_9+cfs_10+cfs_11+cfs_12"

model_aq = "attention =~ att_switch_1+att_switch_2+att_switch_3+att_switch_4+att_switch_5+att_switch_6+att_switch_7+att_switch_8+att_switch_9+att_switch_10"

model_rtc = "rigidite =~ cognitive_rigidity_1+cognitive_rigidity_2+cognitive_rigidity_3+cognitive_rigidity_4"


2. CFA for the cognitive flexibility scale

cfa_data = data_cleaned_scale[,31:42]
#Compute CFA
cfa_cfs = cfa(model_cfs, data=cfa_data, ordered = c("cfs_1","cfs_2","cfs_3","cfs_4","cfs_5","cfs_6","cfs_7","cfs_8","cfs_9","cfs_10","cfs_11","cfs_12"))
summary(cfa_cfs, fit.measures=T)
## lavaan 0.6-6 ended normally after 16 iterations
## 
##   Estimator                                       DWLS
##   Optimization method                           NLMINB
##   Number of free parameters                         71
##                                                       
##   Number of observations                           350
##                                                       
## Model Test User Model:
##                                               Standard      Robust
##   Test Statistic                               307.456     385.463
##   Degrees of freedom                                54          54
##   P-value (Chi-square)                           0.000       0.000
##   Scaling correction factor                                  0.820
##   Shift parameter                                           10.573
##        simple second-order correction                             
## 
## Model Test Baseline Model:
## 
##   Test statistic                              2398.385    1527.505
##   Degrees of freedom                                66          66
##   P-value                                        0.000       0.000
##   Scaling correction factor                                  1.596
## 
## User Model versus Baseline Model:
## 
##   Comparative Fit Index (CFI)                    0.891       0.773
##   Tucker-Lewis Index (TLI)                       0.867       0.723
##                                                                   
##   Robust Comparative Fit Index (CFI)                            NA
##   Robust Tucker-Lewis Index (TLI)                               NA
## 
## Root Mean Square Error of Approximation:
## 
##   RMSEA                                          0.116       0.133
##   90 Percent confidence interval - lower         0.104       0.120
##   90 Percent confidence interval - upper         0.129       0.145
##   P-value RMSEA <= 0.05                          0.000       0.000
##                                                                   
##   Robust RMSEA                                                  NA
##   90 Percent confidence interval - lower                        NA
##   90 Percent confidence interval - upper                        NA
## 
## Standardized Root Mean Square Residual:
## 
##   SRMR                                           0.093       0.093
## 
## Parameter Estimates:
## 
##   Standard errors                           Robust.sem
##   Information                                 Expected
##   Information saturated (h1) model        Unstructured
## 
## Latent Variables:
##                    Estimate  Std.Err  z-value  P(>|z|)
##   flexibilite =~                                      
##     cfs_1             1.000                           
##     cfs_2             0.641    0.087    7.331    0.000
##     cfs_3             0.726    0.079    9.216    0.000
##     cfs_4             0.872    0.076   11.541    0.000
##     cfs_5             0.641    0.087    7.353    0.000
##     cfs_6             0.966    0.079   12.199    0.000
##     cfs_7             0.896    0.084   10.610    0.000
##     cfs_8             0.750    0.080    9.356    0.000
##     cfs_9             0.623    0.075    8.319    0.000
##     cfs_10            0.804    0.079   10.138    0.000
##     cfs_11            0.783    0.080    9.841    0.000
##     cfs_12            1.001    0.081   12.326    0.000
## 
## Intercepts:
##                    Estimate  Std.Err  z-value  P(>|z|)
##    .cfs_1             0.000                           
##    .cfs_2             0.000                           
##    .cfs_3             0.000                           
##    .cfs_4             0.000                           
##    .cfs_5             0.000                           
##    .cfs_6             0.000                           
##    .cfs_7             0.000                           
##    .cfs_8             0.000                           
##    .cfs_9             0.000                           
##    .cfs_10            0.000                           
##    .cfs_11            0.000                           
##    .cfs_12            0.000                           
##     flexibilite       0.000                           
## 
## Thresholds:
##                    Estimate  Std.Err  z-value  P(>|z|)
##     cfs_1|t1         -2.764    0.326   -8.469    0.000
##     cfs_1|t2         -1.998    0.148  -13.538    0.000
##     cfs_1|t3         -1.133    0.085  -13.277    0.000
##     cfs_1|t4         -0.144    0.067   -2.134    0.033
##     cfs_1|t5          0.652    0.073    8.991    0.000
##     cfs_2|t1         -1.948    0.142  -13.758    0.000
##     cfs_2|t2         -1.350    0.095  -14.240    0.000
##     cfs_2|t3         -0.541    0.071   -7.642    0.000
##     cfs_2|t4          0.253    0.068    3.732    0.000
##     cfs_2|t5          0.994    0.081   12.330    0.000
##     cfs_3|t1         -1.579    0.108  -14.572    0.000
##     cfs_3|t2         -1.189    0.088  -13.588    0.000
##     cfs_3|t3         -0.533    0.071   -7.538    0.000
##     cfs_3|t4          0.086    0.067    1.281    0.200
##     cfs_3|t5          0.831    0.076   10.906    0.000
##     cfs_4|t1         -2.529    0.248  -10.207    0.000
##     cfs_4|t2         -1.487    0.102  -14.521    0.000
##     cfs_4|t3         -0.782    0.075  -10.411    0.000
##     cfs_4|t4          0.508    0.070    7.223    0.000
##     cfs_4|t5          1.386    0.097   14.339    0.000
##     cfs_5|t1         -2.384    0.212  -11.250    0.000
##     cfs_5|t2         -1.718    0.119  -14.445    0.000
##     cfs_5|t3         -0.831    0.076  -10.906    0.000
##     cfs_5|t4         -0.007    0.067   -0.107    0.915
##     cfs_5|t5          0.971    0.080   12.147    0.000
##     cfs_6|t1         -2.117    0.164  -12.936    0.000
##     cfs_6|t2         -1.405    0.098  -14.384    0.000
##     cfs_6|t3         -0.187    0.068   -2.774    0.006
##     cfs_6|t4          0.583    0.071    8.163    0.000
##     cfs_7|t1         -2.764    0.326   -8.469    0.000
##     cfs_7|t2         -1.902    0.136  -13.937    0.000
##     cfs_7|t3         -1.068    0.083  -12.860    0.000
##     cfs_7|t4          0.268    0.068    3.945    0.000
##     cfs_7|t5          1.219    0.089   13.735    0.000
##     cfs_8|t1         -2.117    0.164  -12.936    0.000
##     cfs_8|t2         -1.631    0.112  -14.552    0.000
##     cfs_8|t3         -0.716    0.074   -9.706    0.000
##     cfs_8|t4          0.328    0.068    4.795    0.000
##     cfs_8|t5          1.068    0.083   12.860    0.000
##     cfs_9|t1         -2.276    0.190  -11.975    0.000
##     cfs_9|t2         -1.659    0.114  -14.528    0.000
##     cfs_9|t3         -0.744    0.074  -10.010    0.000
##     cfs_9|t4          0.476    0.070    6.803    0.000
##     cfs_9|t5          1.133    0.085   13.277    0.000
##     cfs_10|t1        -1.948    0.142  -13.758    0.000
##     cfs_10|t2        -1.425    0.099  -14.425    0.000
##     cfs_10|t3        -0.744    0.074  -10.010    0.000
##     cfs_10|t4         0.108    0.067    1.601    0.109
##     cfs_10|t5         0.971    0.080   12.147    0.000
##     cfs_11|t1        -2.764    0.326   -8.469    0.000
##     cfs_11|t2        -2.189    0.175  -12.516    0.000
##     cfs_11|t3        -1.605    0.110  -14.566    0.000
##     cfs_11|t4        -0.444    0.070   -6.382    0.000
##     cfs_11|t5         0.305    0.068    4.477    0.000
##     cfs_12|t1        -1.659    0.114  -14.528    0.000
##     cfs_12|t2        -1.147    0.086  -13.357    0.000
##     cfs_12|t3        -0.328    0.068   -4.795    0.000
##     cfs_12|t4         0.583    0.071    8.163    0.000
##     cfs_12|t5         1.298    0.092   14.068    0.000
## 
## Variances:
##                    Estimate  Std.Err  z-value  P(>|z|)
##    .cfs_1             0.599                           
##    .cfs_2             0.835                           
##    .cfs_3             0.788                           
##    .cfs_4             0.695                           
##    .cfs_5             0.835                           
##    .cfs_6             0.625                           
##    .cfs_7             0.678                           
##    .cfs_8             0.774                           
##    .cfs_9             0.844                           
##    .cfs_10            0.741                           
##    .cfs_11            0.754                           
##    .cfs_12            0.598                           
##     flexibilite       0.401    0.047    8.529    0.000
## 
## Scales y*:
##                    Estimate  Std.Err  z-value  P(>|z|)
##     cfs_1             1.000                           
##     cfs_2             1.000                           
##     cfs_3             1.000                           
##     cfs_4             1.000                           
##     cfs_5             1.000                           
##     cfs_6             1.000                           
##     cfs_7             1.000                           
##     cfs_8             1.000                           
##     cfs_9             1.000                           
##     cfs_10            1.000                           
##     cfs_11            1.000                           
##     cfs_12            1.000
fitmeasures(cfa_cfs)
##                          npar                          fmin 
##                        71.000                         0.439 
##                         chisq                            df 
##                       307.456                        54.000 
##                        pvalue                  chisq.scaled 
##                         0.000                       385.463 
##                     df.scaled                 pvalue.scaled 
##                        54.000                         0.000 
##          chisq.scaling.factor                baseline.chisq 
##                         0.820                      2398.385 
##                   baseline.df               baseline.pvalue 
##                        66.000                         0.000 
##         baseline.chisq.scaled            baseline.df.scaled 
##                      1527.505                        66.000 
##        baseline.pvalue.scaled baseline.chisq.scaling.factor 
##                         0.000                         1.596 
##                           cfi                           tli 
##                         0.891                         0.867 
##                          nnfi                           rfi 
##                         0.867                         0.843 
##                           nfi                          pnfi 
##                         0.872                         0.713 
##                           ifi                           rni 
##                         0.892                         0.891 
##                    cfi.scaled                    tli.scaled 
##                         0.773                         0.723 
##                    cfi.robust                    tli.robust 
##                            NA                            NA 
##                   nnfi.scaled                   nnfi.robust 
##                         0.723                            NA 
##                    rfi.scaled                    nfi.scaled 
##                         0.692                         0.748 
##                    ifi.scaled                    rni.scaled 
##                         0.775                         0.773 
##                    rni.robust                         rmsea 
##                            NA                         0.116 
##                rmsea.ci.lower                rmsea.ci.upper 
##                         0.104                         0.129 
##                  rmsea.pvalue                  rmsea.scaled 
##                         0.000                         0.133 
##         rmsea.ci.lower.scaled         rmsea.ci.upper.scaled 
##                         0.120                         0.145 
##           rmsea.pvalue.scaled                  rmsea.robust 
##                         0.000                            NA 
##         rmsea.ci.lower.robust         rmsea.ci.upper.robust 
##                            NA                            NA 
##           rmsea.pvalue.robust                           rmr 
##                            NA                         0.087 
##                    rmr_nomean                          srmr 
##                         0.093                         0.093 
##                  srmr_bentler           srmr_bentler_nomean 
##                         0.087                         0.093 
##                          crmr                   crmr_nomean 
##                         0.093                         0.101 
##                    srmr_mplus             srmr_mplus_nomean 
##                            NA                            NA 
##                         cn_05                         cn_01 
##                        82.903                        93.023 
##                           gfi                          agfi 
##                         0.968                         0.926 
##                          pgfi                           mfi 
##                         0.418                         0.696
lavaanPlot(model = cfa_cfs)
lavaan.diagram(cfa_cfs)

# test with efa stucture
##compute polychoric correlation and assumptions
poly = polychoric(data_cleaned_scale[,31:42])
## Warning in matpLower(x, nvar, gminx, gmaxx, gminy, gmaxy): 66 cells were
## adjusted for 0 values using the correction for continuity. Examine your data
## carefully.
poly = poly$rho
cortest.bartlett(R = poly, n = nrow(data_cleaned_scale))
## $chisq
## [1] 921.8956
## 
## $p.value
## [1] 0.0000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000004578909
## 
## $df
## [1] 66
KMO(poly)
## Kaiser-Meyer-Olkin factor adequacy
## Call: KMO(r = poly)
## Overall MSA =  0.81
## MSA for each item = 
##  cfs_1  cfs_2  cfs_3  cfs_4  cfs_5  cfs_6  cfs_7  cfs_8  cfs_9 cfs_10 cfs_11 
##   0.86   0.79   0.79   0.81   0.85   0.79   0.76   0.69   0.77   0.86   0.84 
## cfs_12 
##   0.87
det(poly)
## [1] 0.06865705
##Compute parallel analysis for number of factors
pa = fa.parallel(poly, fm="pa", fa="fa", main="Scree Plot", se.bars=T, n.obs=nrow(data_cleaned_scale))
## Parallel analysis suggests that the number of factors =  4  and the number of components =  NA
# EFA with polychoric correlation and R² from multiple regression as original communalitites
fit_nflex = fa(data_cleaned_scale[,31:42], cor="poly", nfactors=pa$nfact, fm="pa", rotate="oblimin", residuals=T, correct=T, SMC=T)
## Warning in matpLower(x, nvar, gminx, gmaxx, gminy, gmaxy): 66 cells were
## adjusted for 0 values using the correction for continuity. Examine your data
## carefully.
## maximum iteration exceeded
fa.diagram(fit_nflex, sort=T, errors=T, labels=T, digits=2)
fa.plot(fit_nflex) #plot the loadings of each factors
fit_nflex
## Factor Analysis using method =  pa
## Call: fa(r = data_cleaned_scale[, 31:42], nfactors = pa$nfact, rotate = "oblimin", 
##     residuals = T, SMC = T, fm = "pa", cor = "poly", correct = T)
## Standardized loadings (pattern matrix) based upon correlation matrix
##          PA1   PA3   PA2   PA4   h2   u2 com
## cfs_1   0.52  0.10  0.06  0.10 0.40 0.60 1.2
## cfs_2   0.26  0.48 -0.17 -0.05 0.34 0.66 1.8
## cfs_3  -0.13  0.70  0.09  0.05 0.50 0.50 1.1
## cfs_4   0.03  0.01  0.03  0.78 0.66 0.34 1.0
## cfs_5   0.26  0.41  0.05 -0.17 0.27 0.73 2.1
## cfs_6   0.54  0.10 -0.11  0.26 0.51 0.49 1.6
## cfs_7   0.27  0.02  0.49 -0.01 0.37 0.63 1.6
## cfs_8  -0.02  0.04  0.78  0.04 0.65 0.35 1.0
## cfs_9   0.48 -0.12  0.13  0.03 0.26 0.74 1.3
## cfs_10  0.06  0.43  0.11  0.13 0.31 0.69 1.4
## cfs_11  0.51 -0.04  0.22  0.00 0.34 0.66 1.4
## cfs_12  0.21  0.35  0.02  0.19 0.35 0.65 2.3
## 
##                        PA1  PA3  PA2  PA4
## SS loadings           1.56 1.39 1.07 0.92
## Proportion Var        0.13 0.12 0.09 0.08
## Cumulative Var        0.13 0.25 0.34 0.41
## Proportion Explained  0.32 0.28 0.22 0.19
## Cumulative Proportion 0.32 0.60 0.81 1.00
## 
##  With factor correlations of 
##      PA1  PA3  PA2  PA4
## PA1 1.00 0.35 0.22 0.42
## PA3 0.35 1.00 0.22 0.34
## PA2 0.22 0.22 1.00 0.31
## PA4 0.42 0.34 0.31 1.00
## 
## Mean item complexity =  1.5
## Test of the hypothesis that 4 factors are sufficient.
## 
## The degrees of freedom for the null model are  66  and the objective function was  2.41 with Chi Square of  829.34
## The degrees of freedom for the model are 24  and the objective function was  0.08 
## 
## The root mean square of the residuals (RMSR) is  0.02 
## The df corrected root mean square of the residuals is  0.04 
## 
## The harmonic number of observations is  350 with the empirical chi square  23.99  with prob <  0.46 
## The total number of observations was  350  with Likelihood Chi Square =  28.22  with prob <  0.25 
## 
## Tucker Lewis Index of factoring reliability =  0.985
## RMSEA index =  0.022  and the 90 % confidence intervals are  0 0.051
## BIC =  -112.37
## Fit based upon off diagonal values = 0.99
## Measures of factor score adequacy             
##                                                    PA1  PA3  PA2  PA4
## Correlation of (regression) scores with factors   0.84 0.84 0.84 0.84
## Multiple R square of scores with factors          0.71 0.70 0.71 0.71
## Minimum correlation of possible factor scores     0.42 0.39 0.42 0.41
#Check if a general factor can explain the 3 factors
bassAckward(poly, cor="poly", nfactors = c(1,3), fm = "pa")
## 
## Call: bassAckward(r = poly, nfactors = c(1, 3), fm = "pa", cor = "poly")
## 
##  1 F1
##  2 F1
## Use print with the short = FALSE option to see the correlations, or use the summary command.
##Compute alpha and omega for internal reliability
psych::alpha(data_cleaned_scale[,31:42])
## 
## Reliability analysis   
## Call: psych::alpha(x = data_cleaned_scale[, 31:42])
## 
##   raw_alpha std.alpha G6(smc) average_r S/N   ase mean   sd median_r
##       0.76      0.77    0.79      0.22 3.3 0.019  4.3 0.59     0.21
## 
##  lower alpha upper     95% confidence boundaries
## 0.73 0.76 0.8 
## 
##  Reliability if an item is dropped:
##        raw_alpha std.alpha G6(smc) average_r S/N alpha se  var.r med.r
## cfs_1       0.74      0.74    0.76      0.21 2.8    0.021 0.0120  0.20
## cfs_2       0.76      0.76    0.78      0.22 3.2    0.019 0.0108  0.21
## cfs_3       0.75      0.76    0.77      0.22 3.1    0.020 0.0113  0.21
## cfs_4       0.74      0.75    0.77      0.21 3.0    0.020 0.0121  0.21
## cfs_5       0.75      0.76    0.78      0.22 3.2    0.020 0.0123  0.21
## cfs_6       0.74      0.74    0.76      0.21 2.9    0.020 0.0114  0.21
## cfs_7       0.74      0.75    0.76      0.21 3.0    0.020 0.0116  0.20
## cfs_8       0.76      0.76    0.77      0.22 3.2    0.019 0.0097  0.22
## cfs_9       0.76      0.76    0.78      0.23 3.2    0.019 0.0110  0.22
## cfs_10      0.74      0.75    0.77      0.21 3.0    0.020 0.0126  0.20
## cfs_11      0.75      0.76    0.78      0.22 3.1    0.019 0.0120  0.22
## cfs_12      0.73      0.74    0.76      0.20 2.8    0.022 0.0122  0.20
## 
##  Item statistics 
##          n raw.r std.r r.cor r.drop mean   sd
## cfs_1  350  0.61  0.63  0.59   0.51  4.7 1.05
## cfs_2  350  0.49  0.46  0.38   0.34  4.2 1.25
## cfs_3  350  0.54  0.50  0.43   0.38  4.2 1.41
## cfs_4  350  0.55  0.56  0.51   0.43  4.1 1.00
## cfs_5  350  0.48  0.47  0.38   0.35  4.4 1.10
## cfs_6  350  0.58  0.59  0.56   0.47  4.8 0.99
## cfs_7  350  0.54  0.56  0.52   0.43  4.3 0.93
## cfs_8  350  0.46  0.47  0.42   0.33  4.2 1.11
## cfs_9  350  0.42  0.44  0.36   0.28  4.2 1.05
## cfs_10 350  0.56  0.54  0.48   0.43  4.3 1.21
## cfs_11 350  0.46  0.49  0.41   0.34  5.0 0.98
## cfs_12 350  0.66  0.65  0.61   0.55  3.8 1.24
## 
## Non missing response frequency for each item
##           1    2    3    4    5    6 miss
## cfs_1  0.00 0.02 0.11 0.31 0.30 0.26    0
## cfs_2  0.03 0.06 0.21 0.31 0.24 0.16    0
## cfs_3  0.06 0.06 0.18 0.24 0.26 0.20    0
## cfs_4  0.01 0.06 0.15 0.48 0.22 0.08    0
## cfs_5  0.01 0.03 0.16 0.29 0.34 0.17    0
## cfs_6  0.00 0.02 0.06 0.35 0.29 0.28    0
## cfs_7  0.00 0.03 0.11 0.46 0.28 0.11    0
## cfs_8  0.02 0.03 0.19 0.39 0.23 0.14    0
## cfs_9  0.01 0.04 0.18 0.45 0.19 0.13    0
## cfs_10 0.03 0.05 0.15 0.31 0.29 0.17    0
## cfs_11 0.00 0.01 0.04 0.27 0.29 0.38    0
## cfs_12 0.05 0.08 0.25 0.35 0.18 0.10    0
omega(poly, nfactors = 6, fm = "pa", sl=T, rotation="oblimin")
## maximum iteration exceeded
## Omega 
## Call: omegah(m = m, nfactors = nfactors, fm = fm, key = key, flip = flip, 
##     digits = digits, title = title, sl = sl, labels = labels, 
##     plot = plot, n.obs = n.obs, rotate = rotate, Phi = Phi, option = option, 
##     covar = covar, rotation = "oblimin")
## Alpha:                 0.79 
## G.6:                   0.81 
## Omega Hierarchical:    0.6 
## Omega H asymptotic:    0.69 
## Omega Total            0.86 
## 
## Schmid Leiman Factor loadings greater than  0.2 
##           g   F1*   F2*   F3*   F4*   F5*   F6*   h2   u2   p2
## cfs_1  0.60              0.34                   0.51 0.49 0.70
## cfs_2  0.28  0.52                          0.26 0.43 0.57 0.18
## cfs_3  0.30  0.56                               0.46 0.54 0.20
## cfs_4  0.51                    0.58             0.60 0.40 0.44
## cfs_5  0.30  0.41                               0.28 0.72 0.32
## cfs_6  0.57                    0.26        0.50 0.68 0.32 0.49
## cfs_7  0.41        0.68                         0.64 0.36 0.27
## cfs_8  0.37        0.53                   -0.24 0.55 0.45 0.25
## cfs_9  0.39                          0.72       0.68 0.32 0.23
## cfs_10 0.42  0.36                               0.39 0.61 0.45
## cfs_11 0.49        0.21  0.31                   0.41 0.59 0.60
## cfs_12 0.47  0.36                               0.41 0.59 0.54
## 
## With eigenvalues of:
##    g  F1*  F2*  F3*  F4*  F5*  F6* 
## 2.31 1.04 0.82 0.27 0.48 0.61 0.46 
## 
## general/max  2.23   max/min =   3.84
## mean percent general =  0.39    with sd =  0.17 and cv of  0.44 
## Explained Common Variance of the general factor =  0.39 
## 
## The degrees of freedom are 9  and the fit is  0.01 
## 
## The root mean square of the residuals is  0.01 
## The df corrected root mean square of the residuals is  0.02
## 
## Compare this with the adequacy of just a general factor and no group factors
## The degrees of freedom for just the general factor are 54  and the fit is  0.88 
## 
## The root mean square of the residuals is  0.11 
## The df corrected root mean square of the residuals is  0.12 
## 
## Measures of factor score adequacy             
##                                                  g  F1*  F2*   F3*   F4*  F5*
## Correlation of scores with factors            0.80 0.76 0.78  0.47  0.66 0.77
## Multiple R square of scores with factors      0.65 0.58 0.61  0.22  0.43 0.59
## Minimum correlation of factor score estimates 0.29 0.15 0.22 -0.55 -0.13 0.19
##                                                F6*
## Correlation of scores with factors            0.71
## Multiple R square of scores with factors      0.51
## Minimum correlation of factor score estimates 0.02
## 
##  Total, General and Subset omega for each subset
##                                                  g  F1*  F2*  F3*  F4*  F5*
## Omega total for total scores and subscales    0.86 0.72 0.69 0.58 0.60 0.68
## Omega general for total scores and subscales  0.60 0.28 0.20 0.43 0.26 0.16
## Omega group for total scores and subscales    0.18 0.44 0.49 0.15 0.34 0.52
##                                                F6*
## Omega total for total scores and subscales    0.58
## Omega general for total scores and subscales  0.33
## Omega group for total scores and subscales    0.25
#Remove useless variables
rm(pa, poly, fit_nflex, cfa_cfs, model_cfs)


3. CFA for the attention switching sub-scale

cfa_data = data_cleaned_scale[,11:20]
#Compute CFA
cfa_aq = cfa(model_aq, data=cfa_data, estimator="DWLS", ordered = c("att_switch_1","att_switch_2","att_switch_3","att_switch_4","att_switch_5","att_switch_6","att_switch_7","att_switch_8","att_switch_9","att_switch_10"))
fitmeasures(cfa_aq)
##                npar                fmin               chisq                  df 
##              40.000               0.360             251.753              35.000 
##              pvalue      baseline.chisq         baseline.df     baseline.pvalue 
##               0.000             722.203              45.000               0.000 
##                 cfi                 tli                nnfi                 rfi 
##               0.680               0.588               0.588               0.552 
##                 nfi                pnfi                 ifi                 rni 
##               0.651               0.507               0.685               0.680 
##               rmsea      rmsea.ci.lower      rmsea.ci.upper        rmsea.pvalue 
##               0.133               0.118               0.149               0.000 
##                 rmr          rmr_nomean                srmr        srmr_bentler 
##               0.108               0.118               0.118               0.108 
## srmr_bentler_nomean                crmr         crmr_nomean          srmr_mplus 
##               0.118               0.118               0.130                  NA 
##   srmr_mplus_nomean               cn_05               cn_01                 gfi 
##                  NA              70.039              80.492               0.941 
##                agfi                pgfi                 mfi 
##               0.875               0.439               0.733
summary(cfa_aq, fit.measures=T)
## lavaan 0.6-6 ended normally after 21 iterations
## 
##   Estimator                                       DWLS
##   Optimization method                           NLMINB
##   Number of free parameters                         40
##                                                       
##   Number of observations                           350
##                                                       
## Model Test User Model:
##                                                       
##   Test statistic                               251.753
##   Degrees of freedom                                35
##   P-value (Chi-square)                           0.000
## 
## Model Test Baseline Model:
## 
##   Test statistic                               722.203
##   Degrees of freedom                                45
##   P-value                                        0.000
## 
## User Model versus Baseline Model:
## 
##   Comparative Fit Index (CFI)                    0.680
##   Tucker-Lewis Index (TLI)                       0.588
## 
## Root Mean Square Error of Approximation:
## 
##   RMSEA                                          0.133
##   90 Percent confidence interval - lower         0.118
##   90 Percent confidence interval - upper         0.149
##   P-value RMSEA <= 0.05                          0.000
## 
## Standardized Root Mean Square Residual:
## 
##   SRMR                                           0.118
## 
## Parameter Estimates:
## 
##   Standard errors                             Standard
##   Information                                 Expected
##   Information saturated (h1) model        Unstructured
## 
## Latent Variables:
##                    Estimate  Std.Err  z-value  P(>|z|)
##   attention =~                                        
##     att_switch_1      1.000                           
##     att_switch_2      0.574    0.114    5.046    0.000
##     att_switch_3      0.719    0.123    5.860    0.000
##     att_switch_4      0.486    0.108    4.507    0.000
##     att_switch_5      1.364    0.182    7.483    0.000
##     att_switch_6      1.026    0.146    7.007    0.000
##     att_switch_7      0.888    0.140    6.340    0.000
##     att_switch_8      0.937    0.141    6.644    0.000
##     att_switch_9      0.803    0.131    6.153    0.000
##     att_switch_10     1.157    0.160    7.241    0.000
## 
## Intercepts:
##                    Estimate  Std.Err  z-value  P(>|z|)
##    .att_switch_1      0.000                           
##    .att_switch_2      0.000                           
##    .att_switch_3      0.000                           
##    .att_switch_4      0.000                           
##    .att_switch_5      0.000                           
##    .att_switch_6      0.000                           
##    .att_switch_7      0.000                           
##    .att_switch_8      0.000                           
##    .att_switch_9      0.000                           
##    .att_switch_10     0.000                           
##     attention         0.000                           
## 
## Thresholds:
##                    Estimate  Std.Err  z-value  P(>|z|)
##     att_swtch_1|t1   -1.508    0.104  -14.543    0.000
##     att_swtch_1|t2    0.358    0.069    5.219    0.000
##     att_swtch_1|t3    1.487    0.102   14.521    0.000
##     att_swtch_2|t1   -1.161    0.086  -13.435    0.000
##     att_swtch_2|t2    0.021    0.067    0.320    0.749
##     att_swtch_2|t3    1.234    0.089   13.805    0.000
##     att_swtch_3|t1   -1.425    0.099  -14.425    0.000
##     att_swtch_3|t2   -0.617    0.072   -8.578    0.000
##     att_swtch_3|t3    0.670    0.073    9.196    0.000
##     att_swtch_4|t1   -0.772    0.075  -10.311    0.000
##     att_swtch_4|t2    0.180    0.067    2.667    0.008
##     att_swtch_4|t3    1.282    0.092   14.006    0.000
##     att_swtch_5|t1   -1.234    0.089  -13.805    0.000
##     att_swtch_5|t2   -0.381    0.069   -5.537    0.000
##     att_swtch_5|t3    0.744    0.074   10.010    0.000
##     att_swtch_6|t1   -1.487    0.102  -14.521    0.000
##     att_swtch_6|t2   -0.652    0.073   -8.991    0.000
##     att_swtch_6|t3    0.821    0.076   10.808    0.000
##     att_swtch_7|t1   -1.902    0.136  -13.937    0.000
##     att_swtch_7|t2   -0.811    0.076  -10.709    0.000
##     att_swtch_7|t3    0.452    0.070    6.488    0.000
##     att_swtch_8|t1   -1.860    0.132  -14.085    0.000
##     att_swtch_8|t2   -0.753    0.074  -10.110    0.000
##     att_swtch_8|t3    0.524    0.071    7.433    0.000
##     att_swtch_9|t1   -0.661    0.073   -9.093    0.000
##     att_swtch_9|t2    0.460    0.070    6.593    0.000
##     att_swtch_9|t3    1.350    0.095   14.240    0.000
##     att_swtch_10|1   -0.960    0.080  -12.055    0.000
##     att_swtch_10|2    0.122    0.067    1.814    0.070
##     att_swtch_10|3    1.068    0.083   12.860    0.000
## 
## Variances:
##                    Estimate  Std.Err  z-value  P(>|z|)
##    .att_switch_1      0.789                           
##    .att_switch_2      0.930                           
##    .att_switch_3      0.891                           
##    .att_switch_4      0.950                           
##    .att_switch_5      0.607                           
##    .att_switch_6      0.778                           
##    .att_switch_7      0.833                           
##    .att_switch_8      0.815                           
##    .att_switch_9      0.864                           
##    .att_switch_10     0.717                           
##     attention         0.211    0.042    4.977    0.000
## 
## Scales y*:
##                    Estimate  Std.Err  z-value  P(>|z|)
##     att_switch_1      1.000                           
##     att_switch_2      1.000                           
##     att_switch_3      1.000                           
##     att_switch_4      1.000                           
##     att_switch_5      1.000                           
##     att_switch_6      1.000                           
##     att_switch_7      1.000                           
##     att_switch_8      1.000                           
##     att_switch_9      1.000                           
##     att_switch_10     1.000
# test with efa stucture
##compute polychoric correlation and assumptions
poly = polychoric(data_cleaned_scale[,c(11:13,15:20)])
## Warning in matpLower(x, nvar, gminx, gmaxx, gminy, gmaxy): 7 cells were adjusted
## for 0 values using the correction for continuity. Examine your data carefully.
poly = poly$rho
cortest.bartlett(R = poly, n = nrow(data_cleaned_scale))
## $chisq
## [1] 487.1984
## 
## $p.value
## [1] 0.00000000000000000000000000000000000000000000000000000000000000000000000000000001819305
## 
## $df
## [1] 36
KMO(poly)
## Kaiser-Meyer-Olkin factor adequacy
## Call: KMO(r = poly)
## Overall MSA =  0.69
## MSA for each item = 
##  att_switch_1  att_switch_2  att_switch_3  att_switch_5  att_switch_6 
##          0.74          0.65          0.66          0.74          0.62 
##  att_switch_7  att_switch_8  att_switch_9 att_switch_10 
##          0.72          0.70          0.70          0.69
det(poly)
## [1] 0.2437804
##Compute parallel analysis for number of factors
pa = fa.parallel(poly, fm="pa", fa="fa", main = "Scree Plot", se.bars = T, n.obs = nrow(data_cleaned_scale))

## Parallel analysis suggests that the number of factors =  3  and the number of components =  NA
# EFA with polychoric correlation and R² from multiple regression as original communalitites
fit_nflex = fa(data_cleaned_scale[,11:20], cor="poly", nfactors = pa$nfact, fm="pa", rotate="varimax", residuals = T, correct=F, SMC=T)
fa.diagram(fit_nflex, sort = T, errors = T, labels = T, digits = 2)

fa.plot(fit_nflex) #plot the loadings of each factors

fit_nflex
## Factor Analysis using method =  pa
## Call: fa(r = data_cleaned_scale[, 11:20], nfactors = pa$nfact, rotate = "varimax", 
##     residuals = T, SMC = T, fm = "pa", cor = "poly", correct = F)
## Standardized loadings (pattern matrix) based upon correlation matrix
##                PA1   PA2   PA3   h2   u2 com
## att_switch_1  0.62  0.05 -0.05 0.39 0.61 1.0
## att_switch_2  0.00  0.20  0.41 0.21 0.79 1.5
## att_switch_3  0.02  0.59 -0.03 0.34 0.66 1.0
## att_switch_4  0.08 -0.11  0.62 0.40 0.60 1.1
## att_switch_5  0.54  0.14  0.32 0.41 0.59 1.8
## att_switch_6  0.08  0.83  0.05 0.70 0.30 1.0
## att_switch_7  0.45  0.22 -0.08 0.26 0.74 1.5
## att_switch_8  0.04  0.48  0.35 0.36 0.64 1.9
## att_switch_9  0.55 -0.10  0.01 0.32 0.68 1.1
## att_switch_10 0.55 -0.01  0.28 0.38 0.62 1.5
## 
##                        PA1  PA2  PA3
## SS loadings           1.50 1.40 0.87
## Proportion Var        0.15 0.14 0.09
## Cumulative Var        0.15 0.29 0.38
## Proportion Explained  0.40 0.37 0.23
## Cumulative Proportion 0.40 0.77 1.00
## 
## Mean item complexity =  1.3
## Test of the hypothesis that 3 factors are sufficient.
## 
## The degrees of freedom for the null model are  45  and the objective function was  1.63 with Chi Square of  560.67
## The degrees of freedom for the model are 18  and the objective function was  0.11 
## 
## The root mean square of the residuals (RMSR) is  0.03 
## The df corrected root mean square of the residuals is  0.05 
## 
## The harmonic number of observations is  350 with the empirical chi square  33.2  with prob <  0.016 
## The total number of observations was  350  with Likelihood Chi Square =  38.37  with prob <  0.0035 
## 
## Tucker Lewis Index of factoring reliability =  0.901
## RMSEA index =  0.057  and the 90 % confidence intervals are  0.032 0.082
## BIC =  -67.07
## Fit based upon off diagonal values = 0.98
## Measures of factor score adequacy             
##                                                    PA1  PA2  PA3
## Correlation of (regression) scores with factors   0.83 0.87 0.75
## Multiple R square of scores with factors          0.69 0.76 0.56
## Minimum correlation of possible factor scores     0.37 0.53 0.11
#Check if a general factor can explain the 3 factors
bassAckward(poly, cor="poly", nfactors = c(1,3), fm = "pa")

## 
## Call: bassAckward(r = poly, nfactors = c(1, 3), fm = "pa", cor = "poly")
## 
##  1 F1
##  2 F2
## Use print with the short = FALSE option to see the correlations, or use the summary command.
##Compute alpha and omega for internal reliability
psych::alpha(data_cleaned_scale[,11:20])
## 
## Reliability analysis   
## Call: psych::alpha(x = data_cleaned_scale[, 11:20])
## 
##   raw_alpha std.alpha G6(smc) average_r S/N   ase mean  sd median_r
##        0.6       0.6    0.63      0.13 1.5 0.032  2.6 0.4     0.13
## 
##  lower alpha upper     95% confidence boundaries
## 0.54 0.6 0.66 
## 
##  Reliability if an item is dropped:
##               raw_alpha std.alpha G6(smc) average_r S/N alpha se var.r med.r
## att_switch_1       0.57      0.57    0.60      0.13 1.3    0.034 0.015  0.13
## att_switch_2       0.59      0.59    0.61      0.14 1.4    0.033 0.017  0.14
## att_switch_3       0.59      0.59    0.61      0.14 1.5    0.032 0.013  0.15
## att_switch_4       0.60      0.60    0.62      0.14 1.5    0.032 0.015  0.13
## att_switch_5       0.53      0.54    0.57      0.12 1.2    0.038 0.016  0.11
## att_switch_6       0.57      0.57    0.58      0.13 1.3    0.034 0.013  0.13
## att_switch_7       0.57      0.57    0.60      0.13 1.4    0.034 0.016  0.12
## att_switch_8       0.57      0.57    0.59      0.13 1.3    0.035 0.016  0.11
## att_switch_9       0.59      0.59    0.61      0.14 1.4    0.033 0.014  0.13
## att_switch_10      0.55      0.56    0.58      0.12 1.3    0.036 0.015  0.12
## 
##  Item statistics 
##                 n raw.r std.r r.cor r.drop mean   sd
## att_switch_1  350  0.45  0.48  0.38   0.29  2.4 0.71
## att_switch_2  350  0.41  0.41  0.28   0.22  2.5 0.85
## att_switch_3  350  0.40  0.41  0.30   0.19  2.9 0.86
## att_switch_4  350  0.39  0.36  0.22   0.17  2.3 0.93
## att_switch_5  350  0.61  0.59  0.54   0.43  2.8 0.92
## att_switch_6  350  0.48  0.50  0.44   0.30  2.9 0.81
## att_switch_7  350  0.46  0.47  0.37   0.28  3.1 0.78
## att_switch_8  350  0.48  0.49  0.40   0.30  3.0 0.79
## att_switch_9  350  0.44  0.43  0.31   0.22  2.2 0.91
## att_switch_10 350  0.55  0.53  0.46   0.35  2.4 0.93
## 
## Non missing response frequency for each item
##                  1    2    3    4 miss
## att_switch_1  0.07 0.57 0.29 0.07    0
## att_switch_2  0.12 0.39 0.38 0.11    0
## att_switch_3  0.08 0.19 0.48 0.25    0
## att_switch_4  0.22 0.35 0.33 0.10    0
## att_switch_5  0.11 0.24 0.42 0.23    0
## att_switch_6  0.07 0.19 0.54 0.21    0
## att_switch_7  0.03 0.18 0.47 0.33    0
## att_switch_8  0.03 0.19 0.47 0.30    0
## att_switch_9  0.25 0.42 0.23 0.09    0
## att_switch_10 0.17 0.38 0.31 0.14    0
omega(poly, nfactors = 4, fm = "pa", sl=T, rotation="none")
## maximum iteration exceeded
## maximum iteration exceeded
## Warning in fa.stats(r = r, f = f, phi = phi, n.obs = n.obs, np.obs = np.obs, :
## The estimated weights for the factor scores are probably incorrect. Try a
## different factor score estimation method.
## Warning in fac(r = r, nfactors = nfactors, n.obs = n.obs, rotate = rotate, : An
## ultra-Heywood case was detected. Examine the results carefully
## Warning in cov2cor(t(w) %*% r %*% w): diag(.) had 0 or NA entries; non-finite
## result is doubtful

## Omega 
## Call: omegah(m = m, nfactors = nfactors, fm = fm, key = key, flip = flip, 
##     digits = digits, title = title, sl = sl, labels = labels, 
##     plot = plot, n.obs = n.obs, rotate = rotate, Phi = Phi, option = option, 
##     covar = covar, rotation = "none")
## Alpha:                 0.64 
## G.6:                   0.67 
## Omega Hierarchical:    0.52 
## Omega H asymptotic:    0.68 
## Omega Total            0.76 
## 
## Schmid Leiman Factor loadings greater than  0.2 
##                   g   F1*   F2*   F3*   F4*   h2   u2   p2
## att_switch_1   0.44        0.40             0.34 0.66 0.57
## att_switch_2                           0.71 0.53 0.47 0.04
## att_switch_3         0.53                   0.30 0.70 0.03
## att_switch_5   0.75                         0.43 0.57 1.30
## att_switch_6         0.87                   0.78 0.22 0.02
## att_switch_7   0.33  0.31  0.25       -0.20 0.26 0.74 0.41
## att_switch_8   0.33  0.39 -0.22             0.32 0.68 0.34
## att_switch_9   0.28        0.65             0.50 0.50 0.15
## att_switch_10  0.83                         0.51 0.49 1.35
## 
## With eigenvalues of:
##    g  F1*  F2*  F3*  F4* 
## 1.78 1.31 0.70 0.00 0.56 
## 
## general/max  1.36   max/min =   Inf
## mean percent general =  0.47    with sd =  0.52 and cv of  1.12 
## Explained Common Variance of the general factor =  0.41 
## 
## The degrees of freedom are 6  and the fit is  0.02 
## 
## The root mean square of the residuals is  0.01 
## The df corrected root mean square of the residuals is  0.03
## 
## Compare this with the adequacy of just a general factor and no group factors
## The degrees of freedom for just the general factor are 27  and the fit is  0.86 
## 
## The root mean square of the residuals is  0.15 
## The df corrected root mean square of the residuals is  0.17 
## 
## Measures of factor score adequacy             
##                                                  g  F1*  F2* F3*  F4*
## Correlation of scores with factors            0.95 0.91 0.73   0 0.74
## Multiple R square of scores with factors      0.91 0.83 0.54   0 0.55
## Minimum correlation of factor score estimates 0.82 0.66 0.08  -1 0.09
## 
##  Total, General and Subset omega for each subset
##                                                  g  F1*  F2* F3*  F4*
## Omega total for total scores and subscales    0.76 0.83 0.59  NA 0.52
## Omega general for total scores and subscales  0.52 0.48 0.19  NA 0.02
## Omega group for total scores and subscales    0.28 0.35 0.40  NA 0.50
#Remove useless variables
rm(pa, poly, fit_nflex, cfa_aq, model_aq)


4. CFA for the cognitive rigidity sub-scale

cfa_data = data_cleaned_scale[,7:10]
#Compute CFA
cfa_rtc = cfa(model_rtc, data=cfa_data, estimator="DWLS", ordered=c("cognitive_rigidity_1","cognitive_rigidity_2","cognitive_rigidity_3","cognitive_rigidity_4") )
summary(cfa_rtc, fit.measures=T)
## lavaan 0.6-6 ended normally after 13 iterations
## 
##   Estimator                                       DWLS
##   Optimization method                           NLMINB
##   Number of free parameters                         24
##                                                       
##   Number of observations                           350
##                                                       
## Model Test User Model:
##                                                       
##   Test statistic                                17.818
##   Degrees of freedom                                 2
##   P-value (Chi-square)                           0.000
## 
## Model Test Baseline Model:
## 
##   Test statistic                               721.928
##   Degrees of freedom                                 6
##   P-value                                        0.000
## 
## User Model versus Baseline Model:
## 
##   Comparative Fit Index (CFI)                    0.978
##   Tucker-Lewis Index (TLI)                       0.934
## 
## Root Mean Square Error of Approximation:
## 
##   RMSEA                                          0.151
##   90 Percent confidence interval - lower         0.092
##   90 Percent confidence interval - upper         0.218
##   P-value RMSEA <= 0.05                          0.004
## 
## Standardized Root Mean Square Residual:
## 
##   SRMR                                           0.054
## 
## Parameter Estimates:
## 
##   Standard errors                             Standard
##   Information                                 Expected
##   Information saturated (h1) model        Unstructured
## 
## Latent Variables:
##                    Estimate  Std.Err  z-value  P(>|z|)
##   rigidite =~                                         
##     cogntv_rgdty_1    1.000                           
##     cogntv_rgdty_2    0.873    0.079   11.109    0.000
##     cogntv_rgdty_3    1.146    0.110   10.425    0.000
##     cogntv_rgdty_4    1.021    0.096   10.624    0.000
## 
## Intercepts:
##                    Estimate  Std.Err  z-value  P(>|z|)
##    .cogntv_rgdty_1    0.000                           
##    .cogntv_rgdty_2    0.000                           
##    .cogntv_rgdty_3    0.000                           
##    .cogntv_rgdty_4    0.000                           
##     rigidite          0.000                           
## 
## Thresholds:
##                    Estimate  Std.Err  z-value  P(>|z|)
##     cgntv_rgdt_1|1   -1.531    0.105  -14.560    0.000
##     cgntv_rgdt_1|2   -0.983    0.080  -12.239    0.000
##     cgntv_rgdt_1|3   -0.165    0.067   -2.454    0.014
##     cgntv_rgdt_1|4    0.801    0.076   10.610    0.000
##     cgntv_rgdt_1|5    1.902    0.136   13.937    0.000
##     cgntv_rgdt_2|1   -1.631    0.112  -14.552    0.000
##     cgntv_rgdt_2|2   -0.831    0.076  -10.906    0.000
##     cgntv_rgdt_2|3    0.021    0.067    0.320    0.749
##     cgntv_rgdt_2|4    0.842    0.076   11.004    0.000
##     cgntv_rgdt_2|5    1.631    0.112   14.552    0.000
##     cgntv_rgdt_3|1   -1.605    0.110  -14.566    0.000
##     cgntv_rgdt_3|2   -0.734    0.074   -9.909    0.000
##     cgntv_rgdt_3|3    0.151    0.067    2.241    0.025
##     cgntv_rgdt_3|4    1.147    0.086   13.357    0.000
##     cgntv_rgdt_3|5    1.998    0.148   13.538    0.000
##     cgntv_rgdt_4|1   -1.998    0.148  -13.538    0.000
##     cgntv_rgdt_4|2   -1.120    0.085  -13.196    0.000
##     cgntv_rgdt_4|3   -0.405    0.069   -5.854    0.000
##     cgntv_rgdt_4|4    0.894    0.078   11.489    0.000
##     cgntv_rgdt_4|5    1.821    0.128   14.205    0.000
## 
## Variances:
##                    Estimate  Std.Err  z-value  P(>|z|)
##    .cogntv_rgdty_1    0.586                           
##    .cogntv_rgdty_2    0.684                           
##    .cogntv_rgdty_3    0.455                           
##    .cogntv_rgdty_4    0.568                           
##     rigidite          0.414    0.050    8.249    0.000
## 
## Scales y*:
##                    Estimate  Std.Err  z-value  P(>|z|)
##     cogntv_rgdty_1    1.000                           
##     cogntv_rgdty_2    1.000                           
##     cogntv_rgdty_3    1.000                           
##     cogntv_rgdty_4    1.000
fitmeasures(cfa_rtc)
##                npar                fmin               chisq                  df 
##              24.000               0.025              17.818               2.000 
##              pvalue      baseline.chisq         baseline.df     baseline.pvalue 
##               0.000             721.928               6.000               0.000 
##                 cfi                 tli                nnfi                 rfi 
##               0.978               0.934               0.934               0.926 
##                 nfi                pnfi                 ifi                 rni 
##               0.975               0.325               0.978               0.978 
##               rmsea      rmsea.ci.lower      rmsea.ci.upper        rmsea.pvalue 
##               0.151               0.092               0.218               0.004 
##                 rmr          rmr_nomean                srmr        srmr_bentler 
##               0.046               0.054               0.054               0.046 
## srmr_bentler_nomean                crmr         crmr_nomean          srmr_mplus 
##               0.054               0.054               0.070                  NA 
##   srmr_mplus_nomean               cn_05               cn_01                 gfi 
##                  NA             118.357             181.406               0.995 
##                agfi                pgfi                 mfi 
##               0.933               0.077               0.978
#Compute alpha and omega)
poly = polychoric(data_cleaned_scale[,7:10])
## Warning in matpLower(x, nvar, gminx, gmaxx, gminy, gmaxy): 6 cells were adjusted
## for 0 values using the correction for continuity. Examine your data carefully.
poly = poly$rho
psych::alpha(data_cleaned_scale[,7:10])
## 
## Reliability analysis   
## Call: psych::alpha(x = data_cleaned_scale[, 7:10])
## 
##   raw_alpha std.alpha G6(smc) average_r S/N   ase mean   sd median_r
##       0.71      0.72    0.67      0.39 2.5 0.025  3.5 0.83     0.41
## 
##  lower alpha upper     95% confidence boundaries
## 0.66 0.71 0.76 
## 
##  Reliability if an item is dropped:
##                      raw_alpha std.alpha G6(smc) average_r S/N alpha se  var.r
## cognitive_rigidity_1      0.65      0.66    0.57      0.39 1.9    0.032 0.0051
## cognitive_rigidity_2      0.69      0.70    0.61      0.43 2.3    0.028 0.0017
## cognitive_rigidity_3      0.60      0.61    0.53      0.34 1.6    0.037 0.0131
## cognitive_rigidity_4      0.65      0.65    0.57      0.38 1.8    0.033 0.0141
##                      med.r
## cognitive_rigidity_1  0.39
## cognitive_rigidity_2  0.44
## cognitive_rigidity_3  0.32
## cognitive_rigidity_4  0.44
## 
##  Item statistics 
##                        n raw.r std.r r.cor r.drop mean  sd
## cognitive_rigidity_1 350  0.73  0.73  0.60   0.49  3.6 1.2
## cognitive_rigidity_2 350  0.70  0.69  0.52   0.43  3.5 1.2
## cognitive_rigidity_3 350  0.78  0.78  0.68   0.58  3.3 1.1
## cognitive_rigidity_4 350  0.72  0.74  0.60   0.51  3.7 1.0
## 
## Non missing response frequency for each item
##                         1    2    3    4    5    6 miss
## cognitive_rigidity_1 0.06 0.10 0.27 0.35 0.18 0.03    0
## cognitive_rigidity_2 0.05 0.15 0.31 0.29 0.15 0.05    0
## cognitive_rigidity_3 0.05 0.18 0.33 0.31 0.10 0.02    0
## cognitive_rigidity_4 0.02 0.11 0.21 0.47 0.15 0.03    0
omega(poly, nfactors = 1, fm = "pa", sl=T, rotation="none")
## Omega_h for 1 factor is not meaningful, just omega_t
## Warning in schmid(m, nfactors, fm, digits, rotate = rotate, n.obs = n.obs, :
## Omega_h and Omega_asymptotic are not meaningful with one factor
## Omega 
## Call: omegah(m = m, nfactors = nfactors, fm = fm, key = key, flip = flip, 
##     digits = digits, title = title, sl = sl, labels = labels, 
##     plot = plot, n.obs = n.obs, rotate = rotate, Phi = Phi, option = option, 
##     covar = covar, rotation = "none")
## Alpha:                 0.71 
## G.6:                   0.66 
## Omega Hierarchical:    0.71 
## Omega H asymptotic:    1 
## Omega Total            0.71 
## 
## Schmid Leiman Factor loadings greater than  0.2 
##                         g  F1*   h2   u2 p2
## cognitive_rigidity_1 0.61      0.37 0.63  1
## cognitive_rigidity_2 0.54      0.29 0.71  1
## cognitive_rigidity_3 0.70      0.49 0.51  1
## cognitive_rigidity_4 0.62      0.38 0.62  1
## 
## With eigenvalues of:
##   g F1* 
## 1.5 0.0 
## 
## general/max  13884037164773592   max/min =   1
## mean percent general =  1    with sd =  0 and cv of  0 
## Explained Common Variance of the general factor =  1 
## 
## The degrees of freedom are 2  and the fit is  0.06 
## 
## The root mean square of the residuals is  0.06 
## The df corrected root mean square of the residuals is  0.1
## 
## Compare this with the adequacy of just a general factor and no group factors
## The degrees of freedom for just the general factor are 2  and the fit is  0.06 
## 
## The root mean square of the residuals is  0.06 
## The df corrected root mean square of the residuals is  0.1 
## 
## Measures of factor score adequacy             
##                                                  g F1*
## Correlation of scores with factors            0.85   0
## Multiple R square of scores with factors      0.72   0
## Minimum correlation of factor score estimates 0.45  -1
## 
##  Total, General and Subset omega for each subset
##                                                  g  F1*
## Omega total for total scores and subscales    0.71 0.71
## Omega general for total scores and subscales  0.71 0.71
## Omega group for total scores and subscales    0.00 0.00
#Remove useless variables
rm(poly, cfa_rtc, model_rtc)

VI. Scale validation

1. Convergence

a. Sum computation

#Create columns with sum of each item by factors
data_cleaned_wcst$sum_nflex = apply(data_cleaned_wcst[,c("nflex_2","nflex_3","nflex_5","nflex_7")], 1, sum)
data_cleaned_scale$sum_nflex = apply(data_cleaned_scale[,c("nflex_2","nflex_3","nflex_5","nflex_7")], 1, sum)
data_cleaned_scale$sum_as = apply(data_cleaned_scale[,c(11:20)], 1, sum)
data_cleaned_scale$sum_cr = apply(data_cleaned_scale[,c(7:10)], 1, sum)
data_cleaned_scale$sum_cfs = apply(data_cleaned_scale[,c(31:42)], 1, sum)

#Compute outliers for wcst using boxplot
outliers<-which(data_cleaned_wcst$pourcentage %in% c(boxplot.stats(data_cleaned_wcst$pourcentage)$out))
nflex_wcst = data_cleaned_wcst[-c(outliers),]

b. validity

# PLot of scatterpoint with regression and loess lines (loess is used in timeseries analysis for exemple, and also to check if the relation between 2 variables is linear or not. A linear regression could be used where a quadratic relation fit best to the data ! )
# Add ggplotly(ggplot variable) to get an interactive graphic. Exemple : ggplotly(a)
a = ggplot(data_cleaned_scale, aes(x=sum_nflex, y=sum_cr))+
  geom_point(alpha=.2)+
  geom_smooth(method="lm", se=F, color="#13a4ba")+
  geom_smooth(method="loess", se=F, color="#1334ba")+
  stat_cor(label.x = 21, label.y = 30, digits = 2) +
  theme_apa() +
  xlab("GCF") +
  ylab("RTC-RC")

b = ggplot(data_cleaned_scale, aes(x=sum_nflex, y=sum_as))+
  geom_point(alpha=.2)+
  geom_smooth(method="lm", se=F, color="#13a4ba")+
  geom_smooth(method="loess", se=F, color="#1334ba")+
  stat_cor(label.x = 21, label.y = 40, digits = 2) +
  theme_apa() +
  xlab("GCF") +
  ylab("AQ-AS")

c = ggplot(data_cleaned_scale, aes(x=sum_nflex, y=sum_cfs))+
  stat_cor(label.x = 21, label.y = 80, digits = 2) +
  geom_point(alpha=.2)+
  geom_smooth(method="lm", se=F, color="#13a4ba")+
  geom_smooth(method="loess", se=F, color="#1334ba")+
  theme_apa() +
  xlab("GCF") +
  ylab("CFS")

d = ggplot(nflex_wcst, aes(x=sum_nflex, y=pourcentage))+
  geom_point(alpha=.2)+
  geom_smooth(method="lm", se=F, color="#13a4ba")+
  geom_smooth(method="loess", se=F, color="#1334ba")+
  stat_cor(label.x = 20, label.y = 30, digits = 2) +
  theme_apa() +
  xlab("GCF") +
  ylab("WCST (%)")

figure = ggarrange(d,c,b,a, ncol=2, nrow=2, label.x="GCF")
## `geom_smooth()` using formula 'y ~ x'
## `geom_smooth()` using formula 'y ~ x'
## `geom_smooth()` using formula 'y ~ x'
## `geom_smooth()` using formula 'y ~ x'
## `geom_smooth()` using formula 'y ~ x'
## `geom_smooth()` using formula 'y ~ x'
## `geom_smooth()` using formula 'y ~ x'
## `geom_smooth()` using formula 'y ~ x'
figure

rcorr(x = nflex_wcst$pourcentage, y=nflex_wcst$sum_nflex)
##       x     y
## x  1.00 -0.04
## y -0.04  1.00
## 
## n= 80 
## 
## 
## P
##   x      y     
## x        0.7278
## y 0.7278
rcorr(x = data_cleaned_scale$sum_as, y=data_cleaned_scale$sum_nflex)
##      x    y
## x 1.00 0.54
## y 0.54 1.00
## 
## n= 350 
## 
## 
## P
##   x  y 
## x     0
## y  0
rcorr(x = data_cleaned_scale$sum_cr, y=data_cleaned_scale$sum_nflex)
##      x    y
## x 1.00 0.07
## y 0.07 1.00
## 
## n= 350 
## 
## 
## P
##   x      y     
## x        0.2084
## y 0.2084
rcorr(x = data_cleaned_scale$sum_cfs, y=data_cleaned_scale$sum_nflex)
##      x    y
## x 1.00 0.35
## y 0.35 1.00
## 
## n= 350 
## 
## 
## P
##   x  y 
## x     0
## y  0
rcorr(x = nflex_wcst$pourcentage, y=nflex_wcst$sum_cfs)
##   x
## x 1
## 
## n= 80 
## 
## 
## P
##   x
## x